{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "'edit_functions'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[5], line 15\u001b[0m\n\u001b[1;32m     13\u001b[0m \u001b[38;5;66;03m# eval with dataset\u001b[39;00m\n\u001b[1;32m     14\u001b[0m locagent_loc_file \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mloc_output/locagent/claude_3-5/loc_outputs.jsonl\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m---> 15\u001b[0m locagent_res \u001b[38;5;241m=\u001b[39m \u001b[43mevaluate_results\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlocagent_loc_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     16\u001b[0m \u001b[43m                        \u001b[49m\u001b[43mlevel2key_dict\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     17\u001b[0m \u001b[43m                        \u001b[49m\u001b[43mmetrics\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43macc\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m     18\u001b[0m \u001b[43m                        \u001b[49m\u001b[43mdataset\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/data_ext/ref_code/LocAgent/data/data/SWE-bench_Lite\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     19\u001b[0m \u001b[43m                        \u001b[49m\u001b[38;5;66;43;03m# metrics=['ndcg'],\u001b[39;49;00m\n\u001b[1;32m     20\u001b[0m \u001b[43m                        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     21\u001b[0m locagent_res\n",
      "File \u001b[0;32m/data_ext/ref_code/LocAgent/evaluation/eval_metric.py:438\u001b[0m, in \u001b[0;36mevaluate_results\u001b[0;34m(loc_file, level2key_dict, dataset, split, selected_list, metrics, k_values_list)\u001b[0m\n\u001b[1;32m    436\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m k_values_list:\n\u001b[1;32m    437\u001b[0m     k_values_list \u001b[38;5;241m=\u001b[39m [[\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m3\u001b[39m, \u001b[38;5;241m5\u001b[39m], [\u001b[38;5;241m5\u001b[39m, \u001b[38;5;241m10\u001b[39m], [\u001b[38;5;241m5\u001b[39m, \u001b[38;5;241m10\u001b[39m]]\n\u001b[0;32m--> 438\u001b[0m file_res \u001b[38;5;241m=\u001b[39m \u001b[43mcal_metrics_w_dataset\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    439\u001b[0m \u001b[43m    \u001b[49m\u001b[43mloc_file\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    440\u001b[0m \u001b[43m    \u001b[49m\u001b[43mlevel2key_dict\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfile\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    441\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfile\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    442\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdataset\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    443\u001b[0m \u001b[43m    \u001b[49m\u001b[43msplit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    444\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmetrics\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetrics\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    445\u001b[0m \u001b[43m    \u001b[49m\u001b[43mk_values\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mk_values_list\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    446\u001b[0m \u001b[43m    \u001b[49m\u001b[43mselected_list\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mselected_list\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    447\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    448\u001b[0m module_res \u001b[38;5;241m=\u001b[39m cal_metrics_w_dataset(\n\u001b[1;32m    449\u001b[0m     loc_file,\n\u001b[1;32m    450\u001b[0m     level2key_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodule\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    456\u001b[0m     selected_list\u001b[38;5;241m=\u001b[39mselected_list,\n\u001b[1;32m    457\u001b[0m )\n\u001b[1;32m    458\u001b[0m function_res \u001b[38;5;241m=\u001b[39m cal_metrics_w_dataset(\n\u001b[1;32m    459\u001b[0m     loc_file,\n\u001b[1;32m    460\u001b[0m     level2key_dict[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfunction\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    466\u001b[0m     selected_list\u001b[38;5;241m=\u001b[39mselected_list,\n\u001b[1;32m    467\u001b[0m )\n",
      "File \u001b[0;32m/data_ext/ref_code/LocAgent/evaluation/eval_metric.py:336\u001b[0m, in \u001b[0;36mcal_metrics_w_dataset\u001b[0;34m(loc_file, key, eval_level, dataset, split, k_values, metrics, selected_list)\u001b[0m\n\u001b[1;32m    334\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m instance \u001b[38;5;129;01min\u001b[39;00m bench_data:\n\u001b[1;32m    335\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m eval_level \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfile\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 336\u001b[0m         \u001b[38;5;28;01mfor\u001b[39;00m func \u001b[38;5;129;01min\u001b[39;00m \u001b[43minstance\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43medit_functions\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m:\n\u001b[1;32m    337\u001b[0m             fn \u001b[38;5;241m=\u001b[39m func\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m:\u001b[39m\u001b[38;5;124m\"\u001b[39m)[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m    338\u001b[0m             \u001b[38;5;28;01mif\u001b[39;00m fn \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m gt_dict[instance[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minstance_id\u001b[39m\u001b[38;5;124m\"\u001b[39m]]:\n",
      "\u001b[0;31mKeyError\u001b[0m: 'edit_functions'"
     ]
    }
   ],
   "source": [
    "import sys\n",
    "sys.path.append('/home/czl/workspace/LocAgent')\n",
    "import sys\n",
    "\n",
    "sys.path.insert(0, \"/data_ext/ref_code/LocAgent\")\n",
    "from evaluation.eval_metric import evaluate_results\n",
    "level2key_dict = {\n",
    "    'file': 'found_files',\n",
    "    'module': 'found_modules',\n",
    "    'function': 'found_entities',\n",
    "}\n",
    "\n",
    "# eval with dataset\n",
    "locagent_loc_file = 'loc_output/locagent/claude_3-5/loc_outputs.jsonl'\n",
    "locagent_res = evaluate_results(locagent_loc_file,\n",
    "                        level2key_dict,\n",
    "                        metrics=['acc'],\n",
    "                        dataset=\"/data_ext/ref_code/LocAgent/data/data/SWE-bench_Lite\",\n",
    "                        # metrics=['ndcg'],\n",
    "                        )\n",
    "locagent_res"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "pylocagent",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
